{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 首先 import 必要的模块\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "      <th>pregnants_tfidf</th>\n",
       "      <th>Plasma_glucose_concentration_tfidf</th>\n",
       "      <th>blood_pressure_tfidf</th>\n",
       "      <th>Triceps_skin_fold_thickness_tfidf</th>\n",
       "      <th>serum_insulin_tfidf</th>\n",
       "      <th>BMI_tfidf</th>\n",
       "      <th>Diabetes_pedigree_function_tfidf</th>\n",
       "      <th>Age_tfidf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "      <td>0.878430</td>\n",
       "      <td>1.247145</td>\n",
       "      <td>-0.004133</td>\n",
       "      <td>1.040979</td>\n",
       "      <td>-0.193155</td>\n",
       "      <td>0.283901</td>\n",
       "      <td>0.799306</td>\n",
       "      <td>2.020625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "      <td>-1.167062</td>\n",
       "      <td>-1.667884</td>\n",
       "      <td>-0.760155</td>\n",
       "      <td>0.017170</td>\n",
       "      <td>-0.219950</td>\n",
       "      <td>-1.228865</td>\n",
       "      <td>-0.454561</td>\n",
       "      <td>-0.180151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "      <td>1.156315</td>\n",
       "      <td>1.952868</td>\n",
       "      <td>-0.631586</td>\n",
       "      <td>0.024333</td>\n",
       "      <td>-0.087027</td>\n",
       "      <td>-1.228352</td>\n",
       "      <td>0.723964</td>\n",
       "      <td>-0.000985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.928254</td>\n",
       "      <td>-1.167270</td>\n",
       "      <td>-0.600958</td>\n",
       "      <td>-0.862291</td>\n",
       "      <td>-0.730767</td>\n",
       "      <td>-0.711486</td>\n",
       "      <td>-1.039084</td>\n",
       "      <td>-1.116725</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "      <td>-0.427033</td>\n",
       "      <td>0.289507</td>\n",
       "      <td>-1.113578</td>\n",
       "      <td>0.344739</td>\n",
       "      <td>0.331189</td>\n",
       "      <td>0.713412</td>\n",
       "      <td>2.562834</td>\n",
       "      <td>0.088726</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  pregnants_tfidf  \\\n",
       "0                    0.468492  1.425995       1         0.878430   \n",
       "1                   -0.365061 -0.190672       0        -1.167062   \n",
       "2                    0.604397 -0.105584       1         1.156315   \n",
       "3                   -0.920763 -1.041549       0        -0.928254   \n",
       "4                    5.484909 -0.020496       1        -0.427033   \n",
       "\n",
       "   Plasma_glucose_concentration_tfidf  blood_pressure_tfidf  \\\n",
       "0                            1.247145             -0.004133   \n",
       "1                           -1.667884             -0.760155   \n",
       "2                            1.952868             -0.631586   \n",
       "3                           -1.167270             -0.600958   \n",
       "4                            0.289507             -1.113578   \n",
       "\n",
       "   Triceps_skin_fold_thickness_tfidf  serum_insulin_tfidf  BMI_tfidf  \\\n",
       "0                           1.040979            -0.193155   0.283901   \n",
       "1                           0.017170            -0.219950  -1.228865   \n",
       "2                           0.024333            -0.087027  -1.228352   \n",
       "3                          -0.862291            -0.730767  -0.711486   \n",
       "4                           0.344739             0.331189   0.713412   \n",
       "\n",
       "   Diabetes_pedigree_function_tfidf  Age_tfidf  \n",
       "0                          0.799306   2.020625  \n",
       "1                         -0.454561  -0.180151  \n",
       "2                          0.723964  -0.000985  \n",
       "3                         -1.039084  -1.116725  \n",
       "4                          2.562834   0.088726  "
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train1 = pd.read_csv( \"FE_pima-indians-diabetes.csv\")\n",
    "train2 = pd.read_csv(\"diabetes_train_tfidf.csv\")\n",
    "\n",
    "\n",
    "train2 = train2.drop([\"target\"], axis=1)\n",
    "train =  pd.concat([train1, train2], axis = 1, ignore_index=False)\n",
    "\n",
    "\n",
    "\n",
    "del train1\n",
    "del train2\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "y_train = train['Target']   \n",
    "X_train = train.drop([\"Target\"], axis=1)\n",
    "train1 = pd.read_csv( \"FE_pima-indians-diabetes.csv\")\n",
    "\n",
    "feat_names = X_train.columns \n",
    "#sklearn的学习器大多之一稀疏数据输入，模型训练会快很多\n",
    "#查看一个学习器是否支持稀疏数据，可以看fit函数是否支持: X: {array-like, sparse matrix}.\n",
    "#可自行用timeit比较稠密数据和稀疏数据的训练时间\n",
    "from scipy.sparse import csr_matrix\n",
    "train = csr_matrix(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "lr = LogisticRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold is:  [0.51845651 0.55182829 0.45826533 0.42667647 0.47296192]\n",
      "cv logloss is: 0.4856377034083422\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "# 采用5折交叉验证\n",
    "from sklearn.model_selection import cross_val_score\n",
    "loss = cross_val_score(lr, X_train, y_train, cv=5, scoring='neg_log_loss')\n",
    "#%timeit loss_sparse = cross_val_score(lr, X_train_sparse, y_train, cv=3, scoring='neg_log_loss')\n",
    "print ('logloss of each fold is: ',-loss)\n",
    "print ('cv logloss is:', -loss.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='warn',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='warn', n_jobs=None,\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
       "             scoring='neg_log_loss', verbose=0)"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "#需要调优的参数\n",
    "# 请尝试将L1正则和L2正则分开，并配合合适的优化求解算法（slover）\n",
    "#tuned_parameters = {'penalty':['l1','l2'],\n",
    "#                   'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]\n",
    "#                   }\n",
    "penaltys = ['l1','l2']\n",
    "\n",
    "#训练数据多，C可以大一点（更多相信数据）\n",
    "Cs = [ 0.1, 1, 10, 100, 1000]\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "lr_penalty= LogisticRegression()\n",
    "grid= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='neg_log_loss',return_train_score = True)\n",
    "grid.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.4770298527015175\n",
      "{'C': 0.1, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "print(-grid.best_score_)\n",
    "print(grid.best_params_)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3RV9Z338ff3nJwkXHKBEC4hQdCCllBBjYD1UrW1MtaBtmp1erWXsV0ty5muqTN2ZpYdaZ9en5n26TM+T72MrU77CK0dO1SptLa1tl1FiRSUgAilVkKiRCgJyCU5yff5Y+8kJ4edcALZOSF8XmudlbP3/u1zvjmQ/Tn7t/f+bXN3REREsiXyXYCIiIxMCggREYmkgBARkUgKCBERiaSAEBGRSAX5LmCoTJo0yWfOnJnvMkRETinPPvvsa+5eGbVs1ATEzJkzqa+vz3cZIiKnFDP7U3/L1MUkIiKRFBAiIhIp1oAwsyVmts3MdpjZ7RHLbzazFjPbGD4+lrW81Mx2m9m/x1mniIgcK7ZjEGaWBO4CrgIagfVmttrdt2Q1XeXuy/t5mc8Dv4qrRhEZvTo6OmhsbOTIkSP5LmVEKC4uprq6mlQqlfM6cR6kXgjscPedAGa2ElgGZAdEJDO7AJgCPA7UxVWkiIxOjY2NlJSUMHPmTMws3+Xklbuzd+9eGhsbmTVrVs7rxdnFNB3YlTHdGM7Ldp2ZPWdmD5tZDYCZJYB/BW4b6A3M7BYzqzez+paWlqGqW0RGgSNHjlBRUXHahwOAmVFRUTHovak4AyLqXyV76NgfAzPd/VzgCeCBcP4ngTXuvosBuPs97l7n7nWVlZGn8YrIaUzh0OtEPos4u5gagZqM6WqgKbOBu+/NmLwX+Er4/CLgUjP7JDAeKDSzg+5+zIFuEZGhcuPdvwNg1ccvynMlI0OcexDrgdlmNsvMCoGbgNWZDcxsWsbkUmArgLu/z91nuPtM4DPAgwoHkdPEt98RPEaB8ePH9zxfsmQJ5eXlXHvttZFtP/WpT7FgwQLmzp3LmDFjWLBgAQsWLODhhx8e8D0ON7/A4eYXeqY3bNjA448/PiT1x7YH4e5pM1sOrAWSwP3u3mBmK4B6d18N3GpmS4E0sA+4Oa56ROTU0NDcCkBtnusYarfddhuHDh3i7rvvjlx+1113AfDSSy9x7bXXsnHjxhN6nw0bNrB582aWLFlywrV2i/U6CHdf4+5z3P0sd/8f4bw7wnDA3T/r7rXuPt/dr3D3FyJe4zsDnAYrInJKeOtb30pJSckJrbt9+3auvvpqLrjgAi677DJefPFFAFauXEndFX/Jore9kyuuuILDhw+zYsUKvve97+W093E8o2YsJpERq7u75MOP5beO09idP25gS1PbcdttaQ7adB+LGMjcqlI+95fDs59zyy23cN9993HWWWfx29/+luXLl/PTn/6UO++8k598/9tMqZzE0TFTGTNmDHfccQebN2/mG9/4xkm/rwJCJGajtctEhsf+/ftZt24d1113Xc+8dDoNwMUXX8xf33o777r2am76yKeG/L0VECIy6uX6TX8knsXk7kyaNCnymMS9997LU4+tYs0TTzJ//nyee+65IX1vDdYnIjKCTZgwgWnTpvHII48A0NXVxaZNmwDYuXMnCy9YwOf+/m+YMGECu3fvpqSkhAMHDgzJeysgRESGwaWXXsoNN9zAz3/+c6qrq1m7dm3O665cuZJvfetbzJ8/n9raWh599FEAPv3pT3PhlUu58MqlvO1tb2PevHlceeWVbNq0ifPOO08HqUVERqqDBw/2PP/1r3+d0zozZ85k8+bNfeadeeaZkYGyevXqnmsgxkw7B4DKysohu3maAkJEJDSSjj2MBOpiEhGRSAoIERGJpIAQEZFICggREYmkgBAR6TaKRpIdCgoIEZGYdA/3vXHjRi666CJqa2s599xzWbVq1TFtT3S472ynxHDfIiISGDt2LA8++CCzZ8+mqamJCy64gKuvvpry8vKeNqfdcN8ySmk3XGRQ5syZw+zZswGoqqpi8uTJtLS05Ly+hvsWEYnLT26HV54/frtXwsHucvkCNPVN8BdfHnQpzzzzDO3t7Zx11lk5r6PhvkVERrnm5mY+8IEP8MADD5BI5NaBo+G+RUTilOs3/Rhv7tTW1sY73vEOvvCFL7B48eKc19Nw3yIio1h7ezvvete7+OAHP8gNN9wwqHU13LeIyCj2/e9/n6eeeorvfOc7PaevDuYsJQ33LSIyynQP9/3+97+f97///Tmto+G+RURGohiOPZzK1MUkIiKRFBAiMmq5e75LGDFO5LNQQIjIqFRcXMzevXsVEgThsHfvXoqLiwe1no5BiMioVF1dTWNj46CGtDgVdbS+AkBq/8BBWFxcTHV19aBeWwEhg9bQ3ApAbZ7rEBlIKpVi1qxZ+S4jdg1f/GsA3viPvxny11YXk4iIRFJAiIhIJAWEiIhEUkCIiEikWAPCzJaY2TYz22Fmt0csv9nMWsxsY/j4WDh/gZn9zswazOw5M7sxzjpFRORYsZ3FZGZJ4C7gKqARWG9mq919S1bTVe6+PGveIeCD7r7dzKqAZ81srbvvj6teERHpK849iIXADnff6e7twEpgWS4ruvuL7r49fN4E7AEqY6tURESOEWdATAd2ZUw3hvOyXRd2Iz1sZjXZC81sIVAI/CGeMkVEJEqcAWER87Iv9fsxMNPdzwWeAB7o8wJm04D/BD7s7l3HvIHZLWZWb2b1o/1qSRGR4RZnQDQCmXsE1UBTZgN33+vuR8PJe4ELupeZWSnwGPDP7r4u6g3c/R53r3P3uspK9UCJiAylOANiPTDbzGaZWSFwE7A6s0G4h9BtKbA1nF8IPAI86O4/iLFGERHpR2xnMbl72syWA2uBJHC/uzeY2Qqg3t1XA7ea2VIgDewDbg5Xfw9wGVBhZt3zbnb33O/RJyIiJyXWwfrcfQ2wJmveHRnPPwt8NmK97wLfjbM2EREZmK6kFhGRSAoIERGJpIAQEZFICggREYmkgBARkUgKCBERiaSAEBGRSAoIERGJpIAQEZFICggREYmkgAD49juCh4iI9FBAiIhIpFgH6ztVNDS3AlCb5zpEREYS7UGIiEgkBYSIiERSQIiISCQFhIiIRFJAiIhIJAWEiIhEUkCIiEgkBYSIiERSQIiISCQFhIiIRFJAiIhIJAWEiIhEUkCIiEgkBYSIiERSQIiISCQFhIiIRFJAiIhIJAWEiIhEijUgzGyJmW0zsx1mdnvE8pvNrMXMNoaPj2Us+5CZbQ8fH4qzThEROVZs96Q2syRwF3AV0AisN7PV7r4lq+kqd1+ete5E4HNAHeDAs+G6f46rXhER6SvOPYiFwA533+nu7cBKYFmO614N/Mzd94Wh8DNgSUx1iohIhDgDYjqwK2O6MZyX7Toze87MHjazmkGuKyIiMYkzICxinmdN/xiY6e7nAk8ADwxiXczsFjOrN7P6lpaWkypWRET6ijMgGoGajOlqoCmzgbvvdfej4eS9wAW5rhuuf4+717l7XWVl5ZAVLiIi8QbEemC2mc0ys0LgJmB1ZgMzm5YxuRTYGj5fC7zdzCaY2QTg7eE8EREZJrGdxeTuaTNbTrBhTwL3u3uDma0A6t19NXCrmS0F0sA+4OZw3X1m9nmCkAFY4e774qpVRESOFVtAALj7GmBN1rw7Mp5/FvhsP+veD9wfZ30iItI/XUktIiKRBh0QZpYws9I4ihERkZEjp4Aws/9nZqVmNg7YAmwzs9viLU1ERPIp1z2Iue7eBryT4JjCDOADsVUlIiJ5l2tApMwsRRAQ/+3uHURcuCYiIqNHrgFxN/ASMA54yszOANriKkpERPIvp9Nc3f2bwDczZv3JzK6IpyQRERkJcj1I/TfhQWozs/8wsw3AlTHXJiIieZRrF9NHwoPUbwcqgQ8DX46tKhERybtcA6J7dNVrgG+7+yaiR1wVEZFRIteAeNbMfkoQEGvNrAToiq8sERHJt1zHYvoosADY6e6HzKyCoJtJRERGqVzPYuoys2rgvWYG8Ct3/3GslYlIfnR1QVc6+tHZET7vhK6OjPmZ7TrC5b3t0+kO0ul2Ojo66OzoIN3RTjrdQWfWo6uzg8PtaQC2fmc5eHi5lXt44ZX3ziO8GMudnsuyHPp0bnj3kq6MK7c84/V6XiTjNbLfk2OXHTMdzHPvXWaZNfc0yVg/o21WMRnv7z19+Y5j2e3c6TjSzkEbSxxyCggz+zJwIfC9cNatZvbmcDRWEYnSmYYDzYzpep0kXbDt8WM3oDlvdLM30p0ZrxWu29kxYHvvSuNhG+88dmNuXWnM05gPfe9xQfgoHujjciNNkk6SdJKAP74EZG4yrc+0ZxwG9UEs632dzDbZbY89xBq9zPCsNga4Rb9n3/X7aWP9telbU+97dHDUU8fUOxRy7WK6BljgHvzPMbMHgN/Tz1DdIqOeOxzaB627oG03tDYGz1vD52274UAzeBdndq/z0I2DfpsuS9JlBbgl6bIknVZAJ0m6SJK23o1pmiQdniRNgrQnafckaU/Q7gnaPUW7F9HRlaAj3ACnPUmaoH0n3c/DaU/2tiNBmgI6SeCWhGQhliggUVCAJVMkkikSyQKSBQUkCgpJJlMUpFIkClKkCgpJFhSQKiyioCBFKlVIQWGKwlQhqcJCUqkiCgtTFKUKKSwqpChVQHEqyct3v4ckXZx964+CTaKBYZgFm0gzC38Gn1HPsuMsh+42fV8rfIvwNU69c28avngJ5TG99mDuB1FOcFMfgLIYahEZOdpfDzb2bY3hxj8jBNp2B9Ppw33XSRZBWTXpkir2VS6mceJEth8pp35XG3u8nKOF5RxKJzjcaXQSbqzDDXXPdMbGupMEUScLFiYTFKUSFBUkKSpIUBw+H/BnKklxQfCzqKB3uiTV/RoD/ywqSFCQHJ67AxxJBIM0TCkdaH9DhkOuAfEl4Pdm9kuC/7GXob0HOVWFXT+93/wbe7/1t+4Knh/+c9ZKBiXToGw6TH0TzFkCZTW0FU3mxSPlbGor4dkWo6H5AH9qOtSz1uSSImZYAxWJA8w6/y09G/acNuoRP4sKEiQSp963XDk15XqQ+iEze5LgOIQB/+Dur8RZmMgJcQ827t0b+tbdWd1Au+FAE2T3sxeXQVkNlFVD9cLgZ/ejdDpeMo3dB9I0NLXRsLuVhqY2Nj/byqttR4HDwGFmTBxLbVUp76mrYW5VKbVVpUwuKabhi/8CQO3Sfx7uT0PkpAwYEGZ2ftasxvBnlZlVufuGeMoS6Uf7oYxv+t39/VndQJFdP9OhdDrMuizc8E8PN/7h86KSnuZdXc4f977O5t2tbGloY3NTMw1N29h/qAOAhMEbJo/nzWdNoraqlNqqMuZWlVI2Jp4DhSL5crw9iH8dYJmj8ZhkKHWm4eArvd/6e7p9MrqBDu/LWsmgZGqw8Z9SC3Ou7vnWHwRBDYyb1HvEMkt7uovtew7Q0LSrZ89gS3Mbh9o7gaC//+ypJfzFvKnMrSpjXlUp50wtZUxhMuYPQyT/BgwId9eIrTI0erp+svv7MwLgQDN4Z9/1isp6u3qq6zK+9Yff/EuqoKAwpxIOtafZ2nyALU2tbN7dRkNzKy++cpD2zqC7aVxhkrlhF1H3nsHsKeNJDdPBWZGRJtfrIN4dMbsVeN7d9wxtSXJK6jgc0d/f2HcvoONQ33WShb3f9GddmvHNv6a3S6j4xG5/3nqog4bmVhp2t9HQ1MrmpjZ2thykKzzpfMLYFLVVZXz4kpnUhnsGMyvG6QCwSIbBDLVxEfDLcPpyYB0wx8xWuPt/xlCbjDTpdnjlOSZ2vkaht8ND7+3t/z+099j246cGG/rJb4Q3XJVx4DcMgbGTIHHy3873HDjSGwThnsGufb3HIaaWFjNveinXvGka86pKqZ1eRlVZ8Sl5zrvIcMo1ILqAN7r7qwBmNgX4v8Ai4ClAATEaHdoHu56BXevg5aehaQOkjzANgnP0//zHYINfdX7fs37KqgfV9ZMrd6fxz4d7gyDcM2g5cLSnzcyKsZxbXc5fLZzBvKoyaqtKqRhfNKR1iJwucg2Imd3hENoDzHH3fWbWEUNdMtzcYe8fwjBYFwTDa9uCZYkCmHou1H0Eahax7UdfIm0paj/5m9jK6exy/vjawd4gCH+2HQnG6UkmjNmTx3Pp7Ek9QTC3qpSSYp1JJDJUcg2IX5vZo8APwunrCe5NPQ7YH0tlEq+OI9C8MQyDp4NHdzdRcRnULIJz3wMzFgd7CIW9g4Gl//t/DmkpR9OdbH/1YJ8g2Np8gMMd4ZlEBQneOLWEa+dXUVtVyryqMs6eWkJxSmcSicQp14D4FPBu4BKCC+UeAH7o7g7oTKdTwcGWMAjC7qLmjdDZHiybeBbMvhpmLIKaxTBpzpAcG4jy+tE0W5vbggvOwkDYvucAHZ3B0ePxRQXMrSrlpoU1wZ7B9FLOqtSZRCL5kOuV1G5mvwHaCa5/eCYMBxmJurrgtRd7w2DXOti3M1iWLISq82DRx4MwqFkE4ytjKWP/ofbgiuPuK4+bWvnja6/3jHhcMa6Q2ullvOXsyp49gxkTx+pMIpERItfTXN8DfA14kmAP4n+b2W3u/nCMtUmu2g8FB5B7uouegSNhz9/YiiAIzv9Q0F00bQGkhnYQNHdnz4GjvUEQ/ty9v/dMounlY5hbVcqy+dODMJhexpTSIp1JJDKC5drF9E/Ahd3XPJhZJfAEoIDIh7bmYK9g1zNBKLzyXDC2P8Cks2Hu0t69g4qz+r2KeLDa0120Hu7g5c4KXu6axGOPv9DTVfTawaC7ygxmVYzj/DMm8IGLzmBeOAzFxHFDe0aTiMQv14BIZF0Qt5fc72ctJ6OrE/Zs6T2zaNc62P9ysKygGKZfAG++Ndg7qL4Qxk4c8OU6OoONfM/jUEff6exHxvLug8ZwS/D2T+1k9pQSLj97cs/1BW+cVsr4osGMIi8iI1Wuf8mPm9la4KFw+kZgTTwlneaOHoDG+t4zixrr4WgwPj7jp0DNIjov/DgHJ1/A3pKz2d9utB7uoO1QB62b2mg9tLfPBn7/4Q7aMqa7xxjqz7jCJGVjUpSOSVE2JsUZFWMpC5+Xjw1+tv30i0xP7GPJ7at0JpHIKJbrQerbzOw64GKCYxD3uPsjx1vPzJYA/wtIAve5+5f7aXc9wSm0F7p7vZmlgPuA88MaH3T3L+VS66ki3dlF25E0B179I/6n35FqWs+4Pc9S2rqNBF04xitFs9hW+BaeLz6H+q45bD86kdaGNK//vpPg3k2/i3ztseFGvntDXzNxLPPGpCgP55WNTfUJgcxHLmcLNfxyC4DCQWSUy7kvwN1/CPww1/ZmlgTuAq4iGCZ8vZmtdvctWe1KgFuBpzNm3wAUufubzGwssMXMHnL3l3J9/+HQvZGP6prp/ta+/1B7z7wDh44y5dB2Zh/dzJu6XuD8xIucYcHopK97Eb/vegPP+jKe7ZrD1uQ5JL2MskTvxvzNY4/doGdu6MvHpigtTlFYoN4/ETl5x7sfxAEg6nRWIzj7daCR1BYCO9x9Z/haK4FlwJasdp8Hvgp8JmOeA+PMrAAYQ3B6bdtAtZ6ori6nrWsMBykmvWv/cfvhMwPgwNH0gK89KXWEi4t2clnyReb7NmZ3bKPIj4DBgbFTaClfyKbJ53N02oUkp81j6rixvHdMik+MKaCoQN/ORSS/jjfcd8lAy49jOrArY7qRYOymHmZ2HlDj7o+aWWZAPEwQJs3AWODT7p59IwDM7BbCI6YzZsw4oSJfO3iUvzr4t8HEXb89ZnlRQaLPt/Wq8mLOmVZy7Df54gIqO19hyv7fU9qygaLm9SRaXoC0Q2ciuE1lzYegZiHMWExJWTUn8+GKiMQtztNNos6t7NkbMbME8HXg5oh2C4FOoAqYQDDUxxPdeyM9L+Z+D3APQF1d3QlduFc+tpBbin7GeDvC3Pfc2XMgtrvrpt9+9nBkU15eB9vDM4wOhsNVFZUGZxTNe3cQCNProGj8iZQnIpI3cQZEI1CTMV0NNGVMlwDzgCfDi6WmAqvNbCnwXuBxd+8A9pjZb4E6oE9ADIXCggTLiuoBqJ07pf+G/YxsCkD5GXDm5cF1BzWLguGtE+oiEpFTW5wBsR6YbWazgN3ATQQbfgDcvRWY1D1tZk8CnwnPYnorcKWZfZegi2kx8I0Ya+3LHfbuCE4z7b46+bUXg2WJApg2H+o+Go5dtCi45aWIyCgTW0C4e9rMlgNrCU5zvd/dG8xsBVDv7qsHWP0u4NvAZoKuqm+7+3Nx1WrexRg/DL/5RsTIpuVBCMy/Kbg6ueq8PiObioiMVrFe8urua8i6oM7d7+in7eUZzw8SnOoav/0vc07HFhI4PPE5qHgDzFkShMKMxVAxO7aRTUVERjKNiVBWw97EJA4nxjLj07+AcZOOv46IyGlAAWHGnoLwGILCQUSkh/pOREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJFKsAWFmS8xsm5ntMLPbB2h3vZm5mdVlzDvXzH5nZg1m9ryZFcdZq4iI9FUQ1wubWRK4C7gKaATWm9lqd9+S1a4EuBV4OmNeAfBd4APuvsnMKoCOuGoVEZFjxbkHsRDY4e473b0dWAksi2j3eeCrwJGMeW8HnnP3TQDuvtfdO2OsVUREssQZENOBXRnTjeG8HmZ2HlDj7o9mrTsHcDNba2YbzOzvo97AzG4xs3ozq29paRnK2kVETntxBoRFzPOehWYJ4OvA30W0KwAuAd4X/nyXmb31mBdzv8fd69y9rrKycmiqFhERIN6AaARqMqargaaM6RJgHvCkmb0ELAZWhweqG4Ffuftr7n4IWAOcH2OtIiKSJc6AWA/MNrNZZlYI3ASs7l7o7q3uPsndZ7r7TGAdsNTd64G1wLlmNjY8YP0WYMuxbyEiInGJLSDcPQ0sJ9jYbwW+7+4NZrbCzJYeZ90/A/9GEDIbgQ3u/lhctYqIyLFiO80VwN3XEHQPZc67o5+2l2dNf5fgVFcREckDXUktIiKRFBAiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKRFBAiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKRFBAiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKRYr0n9amidlpZvksQERlxtAchIiKRFBAiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKRFBAiIhJJF8qJxGxFxdcAWJXnOk4Vtf/4m3yXcEqJ8/NSQIjEbNXHL8p3CSInRAEB8OHH8l2BiMiIE+sxCDNbYmbbzGyHmd0+QLvrzczNrC5r/gwzO2hmn4mzThmcFRVf6+k2EZHRK7aAMLMkcBfwF8Bc4K/MbG5EuxLgVuDpiJf5OvCTuGoUEZH+xbkHsRDY4e473b0dWAksi2j3eeCrwJHMmWb2TmAn0BBjjSIi0o84j0FMB3ZlTDcCizIbmNl5QI27P5rZjWRm44B/AK4C+u1eMrNbgFsAZsyYMXSVy4B00FXk9BDnHoRFzPOehWYJgi6kv4todyfwdXc/ONAbuPs97l7n7nWVlZUnVayIiPQV5x5EI1CTMV0NNGVMlwDzgCfNDGAqsNrMlhLsaVxvZl8FyoEuMzvi7v8eY70iIpIhzoBYD8w2s1nAbuAm4L3dC929FZjUPW1mTwKfcfd64NKM+f8CHFQ4iIgMr9i6mNw9DSwH1gJbge+7e4OZrQj3EkREZAQzdz9+q1NAXV2d19fX57sMEZFTipk96+51Ucs0WJ+IiERSQIiISCQFhIiIRFJAiIhIpFFzkNrMWoA/ncRLTAJeG6JyhpLqGhzVNTiqa3BGY11nuHvklcajJiBOlpnV93ckP59U1+CorsFRXYNzutWlLiYREYmkgBARkUgKiF735LuAfqiuwVFdg6O6Bue0qkvHIEREJJL2IEREJJICQkREIp22AWFmN5hZg5l1mVm/p4eZ2RIz22ZmO8zs9mGoa6KZ/czMtoc/J/TTrtPMNoaP1THWM+Dvb2ZFZrYqXP60mc2Mq5ZB1HSzmbVkfD4fi7um8H3vN7M9Zra5n+VmZt8M637OzM4fIXVdbmatGZ/XHcNUV42Z/dLMtoZ/i38T0WbYP7Mc6xr2z8zMis3sGTPbFNZ1Z0Sbof17dPfT8gG8ETgbeBKo66dNEvgDcCZQCGwC5sZc11eB28PntwNf6afdwWH4jI77+wOfBL4VPr8JWDUCaroZ+Pc8/J+6DDgf2NzP8muAnxDcbXEx8PQIqety4NE8fF7TgPPD5yXAixH/lsP+meVY17B/ZuFnMD58ngKeBhZntRnSv8fTdg/C3be6+7bjNFsI7HD3ne7eDqwElsVc2jLggfD5A8A7Y36/geTy+2fW+zDwVgtvEZjHmvLC3Z8C9g3QZBnwoAfWAeVmNm0E1JUX7t7s7hvC5wcI7hszPavZsH9mOdY17MLPoPs2zKnwkX2W0ZD+PZ62AZGj6cCujOlG4v+PMsXdmyH4jwpM7qddsZnVm9k6M4srRHL5/XvaeHCTqFagIqZ6cq0J4LqwS+JhM6uJWJ4P+fj/lKuLwq6Ln5hZ7XC/edgVch7Bt+JMef3MBqgL8vCZmVnSzDYCe4CfuXu/n9dQ/D3GecvRvDOzJwjudZ3tn9z9v3N5iYh5J31e8EB1DeJlZrh7k5mdCfzCzJ539z+cbG1Zcvn9Y/mMBpDL+/0YeMjdj5rZJwi+UV0ZY025Gu7PKlcbCMbjOWhm1wA/AmYP15ub2Xjgh8Dfuntb9uKIVYblMztOXXn5zNy9E1hgZuXAI2Y2z90zjy0N6ec1qgPC3d92ki/RCGR++6wGmk7yNQesy8xeNbNp7t4c7krv6ec1msKfOy24n/d5BH3zQymX37+7TaOZFei0KbMAAAOhSURBVABlxNudcdya3H1vxuS9wFdirGcwYvn/dLIyN37uvsbM/o+ZTXL32AelM7MUwUb4e+7+XxFN8vKZHa+ufH5m4XvuD//ulwCZATGkf4/qYhrYemC2mc0ys0KCgz6xnTEUWg18KHz+IeCYPR0zm2BmReHzScDFwJYYasnl98+s93rgFx4eIYvJcWvK6qNeStCHPBKsBj4YnpmzGGjt7k7MJzOb2t1PbWYLCbYLewdea0je14D/ALa6+7/102zYP7Nc6srHZ2ZmleGeA2Y2Bngb8EJWs6H9exzOo/Aj6QG8iyBtjwKvAmvD+VXAmox21xCcxfAHgq6puOuqAH4ObA9/Tgzn1wH3hc/fDDxPcAbP88BHY6znmN8fWAEsDZ8XAz8AdgDPAGcOw2d0vJq+BDSEn88vgXOG6f/UQ0Az0BH+3/oo8AngE+FyA+4K636efs6ey0NdyzM+r3XAm4eprksIuj+eAzaGj2vy/ZnlWNewf2bAucDvw7o2A3eE82P7e9RQGyIiEkldTCIiEkkBISIikRQQIiISSQEhIiKRFBAiIhJJASEyCGZ28PitBlz/4fDqd8xsvJndbWZ/CEfnfMrMFplZYfh8VF/IKiOfAkJkmITj9STdfWc46z6Cq1xnu3stwSi0kzwYhPDnwI15KVQkpIAQOQHhlb1fM7PNZva8md0Yzk+Ewy40mNmjZrbGzK4PV3sf4ZXxZnYWsAj4Z3fvgmDYFHd/LGz7o7C9SN5oF1bkxLwbWADMByYB683sKYJhT2YCbyIYiXcrcH+4zsUEVzUD1AIbPRh8Lcpm4MJYKhfJkfYgRE7MJQQjxna6+6vArwg26JcAP3D3Lnd/hWCoj27TgJZcXjwMjnYzKxniukVypoAQOTH93YRloJuzHCYYKweCcXzmm9lAf4NFwJETqE1kSCggRE7MU8CN4Q1cKglu6/kM8BuCmxUlzGwKwa0pu20F3gDgwb076oE7M0YFnW1my8LnFUCLu3cM1y8kkk0BIXJiHiEYVXMT8Avg78MupR8SjJi6Gbib4E5kreE6j9E3MD5GcOOoHWb2PMG9K7rvdXAFsCbeX0FkYBrNVWSImdl4D+40VkGwV3Gxu78SjuH/y3C6v4PT3a/xX8Bn/fj3TReJjc5iEhl6j4Y3dikEPh/uWeDuh83scwT3DX65v5XDGyH9SOEg+aY9CBERiaRjECIiEkkBISIikRQQIiISSQEhIiKRFBAiIhLp/wN39a3mu0ahxAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "\n",
    "# plot results\n",
    "n_Cs = len(Cs)\n",
    "number_penaltys = len(penaltys)\n",
    "test_scores = np.array(test_means).reshape(n_Cs,number_penaltys)\n",
    "train_scores = np.array(train_means).reshape(n_Cs,number_penaltys)\n",
    "test_stds = np.array(test_stds).reshape(n_Cs,number_penaltys)\n",
    "train_stds = np.array(train_stds).reshape(n_Cs,number_penaltys)\n",
    "\n",
    "x_axis = np.log10(Cs)\n",
    "for i, value in enumerate(penaltys):\n",
    "    #pyplot.plot(log(Cs), test_scores[i], label= 'penalty:'   + str(value))\n",
    "    plt.errorbar(x_axis, -test_scores[:,i], yerr=test_stds[:,i] ,label = penaltys[i] +' Test')\n",
    "    #plt.errorbar(x_axis, -train_scores[:,i], yerr=train_stds[:,i] ,label = penaltys[i] +' Train')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'logloss' )\n",
    "plt.savefig('LogisticGridSearchCV_C.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# log损失\n",
    "# 0.47602950669723\n",
    "\n",
    "# tfidf_log\n",
    "# 0.4757639254634074\n",
    "\n",
    "# tfidf+org_log\n",
    "# 0.4770298527015175\n",
    "\n",
    "# 正确率\n",
    "# 0.7747395833333334\n",
    "\n",
    "\n",
    "最优选择正确率，使用log损失的话tfidf损失稍微比其他小一些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "pickle.dump(grid.best_estimator_, open(\"L1_org_tfidf.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "######最后这个特征会报错，麻烦老师看作业时告诉我下为什么#####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "X has 8 features per sample; expecting 16",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-45-9254b96fa438>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# # y_org_pred = lr_best_org.predict_proba(X_train)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;31m# # y_tfidf_pred = lr_best_tfidf.predict_proba(X_train)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0my_org_tfidf_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlr_best_org_tfidf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py\u001b[0m in \u001b[0;36mpredict_proba\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m   1652\u001b[0m                                                 self.solver == 'liblinear')))\n\u001b[1;32m   1653\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0movr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1654\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_predict_proba_lr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1655\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1656\u001b[0m             \u001b[0mdecision\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecision_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36m_predict_proba_lr\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    301\u001b[0m         \u001b[0mmulticlass\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mhandled\u001b[0m \u001b[0mby\u001b[0m \u001b[0mnormalizing\u001b[0m \u001b[0mthat\u001b[0m \u001b[0mover\u001b[0m \u001b[0mall\u001b[0m \u001b[0mclasses\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    302\u001b[0m         \"\"\"\n\u001b[0;32m--> 303\u001b[0;31m         \u001b[0mprob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecision_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    304\u001b[0m         \u001b[0mexpit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprob\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprob\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    305\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mprob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mdecision_function\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    268\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mn_features\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m             raise ValueError(\"X has %d features per sample; expecting %d\"\n\u001b[0;32m--> 270\u001b[0;31m                              % (X.shape[1], n_features))\n\u001b[0m\u001b[1;32m    271\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    272\u001b[0m         scores = safe_sparse_dot(X, self.coef_.T,\n",
      "\u001b[0;31mValueError\u001b[0m: X has 8 features per sample; expecting 16"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "\n",
    "# lr_best_org = pickle.load(open('train_org.pkl','rb'))\n",
    "# lr_best_tfidf = pickle.load(open('L1_tfidf.pkl','rb'))\n",
    "lr_best_org_tfidf =  pickle.load(open('L1_org_tfidf.pkl','rb'))\n",
    "\n",
    "# # y_org_pred = lr_best_org.predict_proba(X_train)\n",
    "# # y_tfidf_pred = lr_best_tfidf.predict_proba(X_train)\n",
    "y_org_tfidf_pred = lr_best_org_tfidf.predict_proba(X_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(768, 2)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# y_org_pred.shape\n",
    "# y_tfidf_pred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
